library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
test_data = read_csv('data/test_data.csv', show_col_types = FALSE)

test_df = 
  test_data %>% 
  select(phenotype, beta, p, description, group) %>% 
  arrange(group, phenotype) %>% 
  mutate(
    logp = -log10(test_data$p),
    direction = ifelse(test_data$beta >= 0, "Positive", "Negative"),
    xpos = row_number()) %>% 
  na.omit() 
colnames(test_data)
##  [1] "phenotype"   "snp"         "adjustment"  "beta"        "OR"         
##  [6] "SE"          "p"           "type"        "n_total"     "n_cases"    
## [11] "n_controls"  "HWE_p.min"   "allele_freq" "n_no_snp"    "k_studies"  
## [16] "tau2"        "I2.percent"  "Q"           "Q.df"        "Q.p"        
## [21] "beta.fixed"  "OR.fixed"    "SE.fixed"    "p.fixed"     "beta.random"
## [26] "OR.random"   "SE.random"   "p.random"    "description" "group"
table_cols = c( "phenotype", "description", "group", "snp", "beta","OR", "SE","p", 
               "n_total", "n_cases", "n_controls", "allele_freq" )

test_data %>% 
  select(table_cols) %>% 
  arrange(p) %>% 
  head(5) %>% 
  knitr::kable(
    col.names = c('Phenocode', 'Phenocode', 'Group', 'SNP', 'Beta', ' OR', 'SE', 'P-value',
                  'Sample Size', 'Number of Cases', 'Number of Controls','Allele Frequency')
  )
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(table_cols)
## 
##   # Now:
##   data %>% select(all_of(table_cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Phenocode Phenocode Group SNP Beta OR SE P-value Sample Size Number of Cases Number of Controls Allele Frequency
727.6 Rupture of tendon, nontraumatic musculoskeletal 1:198394253_T -0.0544859 0.9469718 0.0121102 0.0000068 1199195 22957 1176238 0.7846658
591.0 Urinary tract infection genitourinary 1:198394253_T -0.0260412 0.9742949 0.0058566 0.0000087 1240410 113316 1127094 0.7837394
727.0 Other disorders of synovium, tendon, and bursa musculoskeletal 1:198394253_T -0.0229708 0.9772910 0.0055977 0.0000407 1200348 122649 1077699 0.7833641
242.0 Thyrotoxicosis with or without goiter endocrine/metabolic 1:198394253_T 0.0455155 1.0465673 0.0135011 0.0007483 1271439 17796 1253643 0.7840630
244.4 Hypothyroidism NOS endocrine/metabolic 1:198394253_T 0.0196217 1.0198155 0.0059710 0.0010156 1318612 130098 1188514 0.7828356
# establish signficance threshold
n_tests <- nrow(test_df)
p_thresh <- 0.05 / n_tests
logp_thresh <- -log10(p_thresh)

cat_ticks = 
  test_df %>%
  group_by(group) %>%
  summarize(midpoint = mean(xpos))

# ---- Compute category boundaries & midpoints ----
cat_bounds <- test_df %>%
  group_by(group) %>%
  summarize(start = min(xpos) - 0.5, end = max(xpos) + 0.5, midpoint = mean(xpos)) %>%
  ungroup()

# ---- Create alternating shaded rectangles ----
shapes <- list(
  # Bonferroni line
  list(
    type = "line",
    x0 = 0,
    x1 = 1,
    xref = "paper",
    y0 = logp_thresh,
    y1 = logp_thresh,
    line = list(color = "red", dash = "dash")
  )
)

# Add shaded rectangles per category (alternate gray/transparent)
for (i in seq_len(nrow(cat_bounds))) {
  if (i %% 2 == 1) {  # shade every other category
    shapes <- append(shapes, list(
      list(
        type = "rect",
        xref = "x",
        yref = "paper",
        x0 = cat_bounds$start[i],
        x1 = cat_bounds$end[i],
        y0 = 0,
        y1 = 1,
        fillcolor = "lightgray",
        opacity = 0.2,
        line = list(width = 0)
      )
    ))
  }
}


interactive_plot = plot_ly(
  data = test_df,
  x = ~xpos,
  y = ~logp,
  type = "scatter",
  mode = "markers",
  color = ~group,           
  symbol = ~direction,            # shape by sign of beta
  symbols = c("triangle-down", "triangle-up"), # map Negative/Positive
  text = ~paste("Phenotype:", description,
                "<br>Phenocode:", phenotype,
                "<br>Group:", group, 
                "<br>P-value:", signif(p, 3),
                "<br>Beta:", signif(beta, 3)),
  hoverinfo = "text",
  showlegend = FALSE 
) %>%
  layout(
    title = "Interactive PheWAS Manhattan Plot",
    xaxis = list(
      title = "Phenotype",
      type = "linear", 
      tickvals = cat_ticks$midpoint,
      ticktext = cat_ticks$group
    ),
    yaxis = list(title = "-log10(P-value)"),
    shapes = shapes
      )

interactive_plot
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors